{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Kernel Density Estimate of Species Distributions\n\nThis shows an example of a neighbors-based query (in particular a kernel\ndensity estimate) on geospatial data, using a Ball Tree built upon the\nHaversine distance metric -- i.e. distances over points in latitude/longitude.\nThe dataset is provided by Phillips et. al. (2006).\nIf available, the example uses\n`basemap <https://matplotlib.org/basemap/>`_\nto plot the coast lines and national boundaries of South America.\n\nThis example does not perform any learning over the data\n(see `sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` for\nan example of classification based on the attributes in this dataset).  It\nsimply shows the kernel density estimate of observed data points in\ngeospatial coordinates.\n\nThe two species are:\n\n - `\"Bradypus variegatus\"\n   <http://www.iucnredlist.org/apps/redlist/details/3038/0>`_ ,\n   the Brown-throated Sloth.\n\n - `\"Microryzomys minutus\"\n   <http://www.iucnredlist.org/details/13408/0>`_ ,\n   also known as the Forest Small Rice Rat, a rodent that lives in Peru,\n   Colombia, Ecuador, Peru, and Venezuela.\n\nReferences\n----------\n\n * `\"Maximum entropy modeling of species geographic distributions\"\n   <http://rob.schapire.net/papers/ecolmod.pdf>`_\n   S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,\n   190:231-259, 2006.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Author: Jake Vanderplas <jakevdp@cs.washington.edu>\n#\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.datasets import fetch_species_distributions\nfrom sklearn.neighbors import KernelDensity\n\n# if basemap is available, we'll use it.\n# otherwise, we'll improvise later...\ntry:\n    from mpl_toolkits.basemap import Basemap\n    basemap = True\nexcept ImportError:\n    basemap = False\n\n\ndef construct_grids(batch):\n    \"\"\"Construct the map grid from the batch object\n\n    Parameters\n    ----------\n    batch : Batch object\n        The object returned by :func:`fetch_species_distributions`\n\n    Returns\n    -------\n    (xgrid, ygrid) : 1-D arrays\n        The grid corresponding to the values in batch.coverages\n    \"\"\"\n    # x,y coordinates for corner cells\n    xmin = batch.x_left_lower_corner + batch.grid_size\n    xmax = xmin + (batch.Nx * batch.grid_size)\n    ymin = batch.y_left_lower_corner + batch.grid_size\n    ymax = ymin + (batch.Ny * batch.grid_size)\n\n    # x coordinates of the grid cells\n    xgrid = np.arange(xmin, xmax, batch.grid_size)\n    # y coordinates of the grid cells\n    ygrid = np.arange(ymin, ymax, batch.grid_size)\n\n    return (xgrid, ygrid)\n\n\n# Get matrices/arrays of species IDs and locations\ndata = fetch_species_distributions()\nspecies_names = ['Bradypus Variegatus', 'Microryzomys Minutus']\n\nXtrain = np.vstack([data['train']['dd lat'],\n                    data['train']['dd long']]).T\nytrain = np.array([d.decode('ascii').startswith('micro')\n                  for d in data['train']['species']], dtype='int')\nXtrain *= np.pi / 180.  # Convert lat/long to radians\n\n# Set up the data grid for the contour plot\nxgrid, ygrid = construct_grids(data)\nX, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])\nland_reference = data.coverages[6][::5, ::5]\nland_mask = (land_reference > -9999).ravel()\n\nxy = np.vstack([Y.ravel(), X.ravel()]).T\nxy = xy[land_mask]\nxy *= np.pi / 180.\n\n# Plot map of South America with distributions of each species\nfig = plt.figure()\nfig.subplots_adjust(left=0.05, right=0.95, wspace=0.05)\n\nfor i in range(2):\n    plt.subplot(1, 2, i + 1)\n\n    # construct a kernel density estimate of the distribution\n    print(\" - computing KDE in spherical coordinates\")\n    kde = KernelDensity(bandwidth=0.04, metric='haversine',\n                        kernel='gaussian', algorithm='ball_tree')\n    kde.fit(Xtrain[ytrain == i])\n\n    # evaluate only on the land: -9999 indicates ocean\n    Z = np.full(land_mask.shape[0], -9999, dtype='int')\n    Z[land_mask] = np.exp(kde.score_samples(xy))\n    Z = Z.reshape(X.shape)\n\n    # plot contours of the density\n    levels = np.linspace(0, Z.max(), 25)\n    plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)\n\n    if basemap:\n        print(\" - plot coastlines using basemap\")\n        m = Basemap(projection='cyl', llcrnrlat=Y.min(),\n                    urcrnrlat=Y.max(), llcrnrlon=X.min(),\n                    urcrnrlon=X.max(), resolution='c')\n        m.drawcoastlines()\n        m.drawcountries()\n    else:\n        print(\" - plot coastlines from coverage\")\n        plt.contour(X, Y, land_reference,\n                    levels=[-9998], colors=\"k\",\n                    linestyles=\"solid\")\n        plt.xticks([])\n        plt.yticks([])\n\n    plt.title(species_names[i])\n\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}